---
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r packages and set wd}

library(tidyverse)
library(lubridate)
library(fuzzyjoin)
library(zoo)
library(haven) 
library(readxl)

`%nin%` = Negate(`%in%`)

theme <- theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        legend.position="top",
        strip.background = element_blank(),
        panel.background=element_blank(),
        panel.grid.minor=element_blank(),
        panel.grid.major= element_line(color = "seashell1"),
        plot.background=element_blank(),
        axis.text.x = element_text(size = 8),
        legend.text = element_text(size = 8),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8))

dir.create("data/transformed")
dir.create("outputs")
dir.create("outputs/figures")
dir.create("outputs/tables")

```

## Download lobby data
```{r}

# temp <- tempfile()
# download.file("https://lobbycanada.gc.ca/od-do/Communications_OCL_CAL.zip",temp)
# unzip(temp, exdir = "data/raw/")

Communication_DpohExport <- read_csv("data/raw/Communication_DpohExport.csv")

Communication_PrimaryExport <- read_csv("data/raw/Communication_PrimaryExport.csv")

combined_data <- inner_join(Communication_DpohExport, Communication_PrimaryExport, by = "COMLOG_ID")

```

```{r}

combined_data %>% 
  mutate(date = as_date(COMM_DATE)) %>% 
  mutate(group = ifelse(DPOH_TITLE_TITRE_TCPD %in% c("Senator","Member of Parliament"), 
                        "Parliamentarian","Civil Servants")) %>% 
  filter(date >= "2010-09-01", date < "2019-09-01") %>% 
  ggplot(aes(x = date, fill = group)) +
  geom_histogram(col = "white", binwidth = 108) +
  facet_wrap(group~.) +
  theme +
  scale_fill_manual(values = c("slategrey","black")) +
  guides(fill = FALSE) +
  labs(x = "Year", y = "Number of lobbyist contacts")

ggsave("outputs/figures/comparison.png", width = 7.5, height = 4)

```

## Keep only Senators and MPs
```{r}

lobby_data <- filter(combined_data, 
                     DPOH_TITLE_TITRE_TCPD %in% c("Senator","Member of Parliament")) %>%
  select(id = COMLOG_ID,
         date = COMM_DATE,
         lobbied_ln = DPOH_LAST_NM_TCPD,
         lobbied_fn = DPOH_FIRST_NM_PRENOM_TCPD,
         lobbied_title = DPOH_TITLE_TITRE_TCPD,
         lobby_id = CLIENT_ORG_CORP_NUM,
         lobby_org_english = EN_CLIENT_ORG_CORP_NM_AN,
         lobby_org_french = FR_CLIENT_ORG_CORP_NM,
         lobbied_org = INSTITUTION,
         lobbied_org_other = OTHER_INSTITUTION_AUTRE,
         lobbied_branch = BRANCH_UNIT_DIRECTION_SERVICE) %>%
  mutate(date = as.Date(date),
         lobby_name = paste(lobbied_ln,
                      lobbied_fn,
                      sep = "_")) %>%
  filter(date < "2019-09-11")
  
lobby_data <- lobby_data %>% mutate(year = format(date, "%Y"))

```

## Lobbied data 

Rompkey, Bill
Robichaud, Fernand
Hervieux-Payette, Céline
Doyle, Norman
Baker, George

```{r}

# Taken from https://lop.parl.ca/sites/ParlInfo/default/en_CA/People/parliamentarians
# Last pulled September 1, 2019
# Some dates are incorrectly entered - these are managed later
original_senators <- read_xlsx("data/raw/senators.xlsx") %>% mutate(role = "sen")
original_mps <- read_xlsx("data/raw/mps.xlsx") %>% mutate(role = "mp")

# First combine MP and senators - although we do things a bit differently with them below
parls <- rbind(original_senators, original_mps) %>%
  transmute(name = Name,
            role = role,
            province = `Province/Territory`,
            gender = Gender,
            profession = Profession,
            cob = `Country of Birth`,
            party = `Political Affiliation`,
            type = `Type of Parliamentarian`)

# This is an ugly solution but need to remove excess parentheses
parls <- mutate(parls,
                party = as.character(party),
                party = gsub("Non-affiliated \\(Independent Senators Group\\)",
                                             "Independent Senators Group",
                                             party),
                party = gsub("Non-affiliated \\(Independent Reform\\)",
                                             "Non-affiliated",
                                             party),
                party = gsub("Conservative \\(1867-1942\\)",
                                             "Conservative",
                                             party),
                party = gsub("Unionist \\(Conservative and Liberal\\)",
                                             "Unionist",
                                             party))

# Add province (sometimes repeated in the data)
parls <- mutate(parls,
                province = as.factor(case_when(
                  grepl("British Columbia", province) ~ "BC",
                  grepl("Alberta", province) ~ "AB",
                  grepl("Saskatchewan", province) ~ "SK",
                  grepl("Manitoba", province) ~ "MB",
                  grepl("Ontario", province) ~ "ON",
                  grepl("Quebec", province) ~ "QC",
                  grepl("New Brunswick", province) ~ "NB",
                  grepl("Nova Scotia", province) ~ "NS",
                  grepl("Prince Edward Island", province) ~ "PEI",
                  grepl("Newfoundland", province) ~ "NFL",
                  TRUE ~ "NWT"
                )))

# Confirm how many different affiliations individual parliamentarians have
parls <- parls %>%
  mutate(count_party = str_count(pattern = "\\)", string = party)) %>%
  tidyr::uncount(count_party, .id = "count_party")

# Split into dates and parties - have some trouble with strsplit over vectors
# str_split also does not work
for (i in 1:nrow(parls)) {
  parls$s_party[i] = strsplit(parls$party[i],")")[[1]][parls$count_party[i]]
}

parls <- parls %>%
  mutate(count_role = str_count(pattern = "\\)", string = type)) %>%
  tidyr::uncount(count_role, .id = "count_role")

for (i in 1:nrow(parls)) {
  
  parls$s_type[i] = strsplit(parls$type[i],")")[[1]][parls$count_role[i]]
  
  # Or take first option
  # if(is.na(parls$role2[i])) {
  #   parls$s_type[i] = strsplit(parls$`Type of Parliamentarian`[i],")")[[1]][1]
  # }

}

# Group all the individuals into their parties
parls <- parls %>%
  mutate(party_short = factor(case_when(
    grepl("Liberal", s_party) ~ "LPC",
    grepl("Progressive Conservative", s_party) ~ "CPC",
    grepl("Conservative Party of Canada", s_party) ~ "CPC",
    grepl("People's Party of Canada ", s_party) ~ "CPC",
    grepl("Independent Senators Group", s_party) ~ "ISG",
    grepl("Québec debout", s_party) ~ "BQ",
    grepl("Bloc Québécois", s_party) ~ "BQ",
    grepl("Groupe parlementaire québécois", s_party) ~ "BQ",
    grepl("New Democratic Party", s_party) ~ "NDP",
    grepl("Co-operative Commonwealth Federation", s_party) ~ "NDP",
    grepl("Non-affiliated", s_party) ~ "NONE",
    grepl("Independent", s_party) ~ "NONE",
    grepl("Strength in Democracy", s_party) ~ "BQ",
    grepl("Green Party of Canada", s_party) ~ "Green",
    TRUE ~ NA_character_)))

parls <- parls %>% mutate(
  type_short = factor(case_when(
    grepl("MP ", s_type) ~ "mp",
    grepl("Senator", s_type) ~ "sen")))

# Get 17 errors here but only for old MPs
parls <- parls %>%
  separate(s_party, c("party_junk","dates_party"), remove = FALSE, sep = " \\(") %>%
  separate(dates_party, c("begin_p","end_p"), remove = FALSE, sep = " - ") %>%
  separate(s_type, c("type_junk","dates_type"), remove = FALSE, sep = " \\(") %>%
  separate(dates_type, c("begin_t","end_t"), remove = FALSE, sep = " - ")

# Make sure they are all dates
parls <- mutate(parls,
            begin_p = as_date(begin_p),
            end_p = as_date(end_p),
            begin_t = as_date(begin_t),
            end_t = as_date(end_t))

parls <- mutate(parls, 
                begin_p = case_when(name == "McLeod, Cathy" ~ as.Date("2008-10-14"),
                                    name == "Dhalla, Ruby" ~ as.Date("2004-06-28"),
                                    name == "Payne, LaVar" ~ as.Date("2008-10-14"),
                                    name == "Siksay, Bill" ~ as.Date("2004-06-28"),
                                    name == "St. Amand, Lloyd" ~ as.Date("2004-06-28"),
                                    name == "Stewart, Kennedy" ~ as.Date("2011-05-02"),
                                    name == "Valley, Roger" ~ as.Date("2004-06-28"),
                                    TRUE ~ begin_p),
                begin_t = case_when(name == "McLeod, Cathy" ~ as.Date("2008-10-14"),
                                    name == "Dhalla, Ruby" ~ as.Date("2004-06-28"),
                                    name == "Payne, LaVar" ~ as.Date("2008-10-14"),
                                    name == "Siksay, Bill" ~ as.Date("2004-06-28"),
                                    name == "St. Amand, Lloyd" ~ as.Date("2004-06-28"),
                                    name == "Stewart, Kennedy" ~ as.Date("2011-05-02"),
                                    name == "Valley, Roger" ~ as.Date("2004-06-28"),
                                    TRUE ~ begin_t),
                end_p = case_when(name == "McLeod, Cathy" ~ today(), TRUE ~ end_p),
                end_t = case_when(name == "McLeod, Cathy" ~ today(), TRUE ~ end_t),
                )


# Add an end date with the last recorded lobbying activity - again get some failures to parse mostly with older parliamentarins - the remaining are fixed manually
parls <- mutate(parls,
                end_p = case_when(
                  is.na(end_p) & begin_p >= as.Date("1980-01-01") ~ "2019-09-10",
                  TRUE ~ as.character(end_p)),
                end_p = as_date(end_p),
                end_t = case_when(
                  is.na(end_t) & begin_t >= as.Date("1980-01-01") ~ "2019-09-10",
                  TRUE ~ as.character(end_t)),
                end_t = as_date(end_t))

# Filter to only include those whose end date is after lobbying begins
parls <- filter(parls, end_t >= min(lobby_data$date, na.rm = TRUE))

# subset parls to only those where begining date and role matches
parls <- filter(parls, type_short == role,
                !(end_t < begin_p | end_p < begin_t),
                !(end_p <= as.Date("2008-01-01") | end_t <= as.Date("2008-01-01")))

filter(parls, role == "sen") %>% pull(party_short) %>% summary()

# Add an absolute start date to calculate tenure
parls_initial <- parls %>%
  group_by(name, role) %>%
  summarize(start = min(begin_t, na.rm = TRUE),
            leave = max(end_t, na.rm = TRUE))

parls <- left_join(parls, parls_initial, by = c("name","role"))

# just take the columns we need
parls <- parls %>%
  transmute(name = name,
            birth_country = cob,
            province = province,
            role = role,
            party = party_short,
            begin = begin_p, 
            end = end_p,
            start = start,
            leave = leave,
            gender = gender,
            independent = case_when(
              party_short %in% c("NONE","ISG") & role == "sen" ~ 1,
              role == "sen" ~ 0,
              TRUE ~ as.double(NA)),
            treated = case_when(
              party_short %in% c("ISG") & role == "sen" ~ 1,
              role == "sen" ~ 0,
              TRUE ~ as.double(NA)))

# Check to see if treatment has been applied (should be 63)
parls %>% filter(role == "sen") %>% group_by(treated) %>% count()
parls %>% filter(role == "sen") %>% group_by(independent) %>% count()

# Create a standardized name variable to do all the matching
parls <- parls %>%
  mutate(name_parls = paste(gsub(",.*$", "", name),
                           gsub('.*\\, ', '', name),
                           sep = "_"))

# Add the PM who appointed a particular senator
parls_appointment <- parls %>% 
  group_by(name_parls, role, begin) %>%
  summarize(start_date = min(mean(start))) %>%
  mutate(appointing_PM = case_when(
           start_date <= as.Date("1979/06/03") & role == "sen" ~ "Pierre_Trudeau",
           start_date <= as.Date("1980/02/02") & role == "sen" ~ "Joe_Clark",
           start_date <= as.Date("1984/06/29") & role == "sen" ~ "Pierre_Trudeau",
           start_date <= as.Date("1984/09/16") & role == "sen" ~ "John_Turner",
           start_date <= as.Date("1993/06/25") & role == "sen" ~ "Brian_Mulroney",
           start_date <= as.Date("2003/12/11") & role == "sen" ~ "Jean_Chretien",
           start_date <= as.Date("2006/02/06") & role == "sen" ~ "Paul_Martin",
           start_date <= as.Date("2014/11/03") & role == "sen" ~ "Stephen_Harper",
           start_date >= as.Date("2015/11/04") & role == "sen" ~ "Justin_Trudeau",
           TRUE ~ NA_character_),
         appointing_party = case_when(
           appointing_PM %in% c("Jean_Chretien","Justin_Trudeau","Paul_Martin","Pierre_Trudeau") ~ "LPC",
           appointing_PM %in% c("Brian_Mulroney","Joe_Clark","Stephen_Harper") ~ "CPC"
         ))

filter(parls_appointment, role == "sen") %>% pull(appointing_PM) %>% factor() %>% summary()
filter(parls_appointment, role == "sen") %>% pull(appointing_party) %>% factor() %>% summary()

parls <- left_join(parls, 
               select(parls_appointment , name_parls, role, appointing_party, appointing_PM, begin),
               by = c("name_parls","role", "begin"))

# Some diplicates slipped in somewhere - this should remove
parls <- parls %>%
  distinct(name, birth_country, province, role, party, begin, end, start, leave,
           gender, name_parls, appointing_party, appointing_PM,
           treated, independent)

# some fixes
parls <- filter(parls,
                !(name == "Goodale, Ralph" & begin == "2010-09-07"))

# checks
sen_list <- filter(parls, role == "sen") %>% pull(name_parls)
mp_list <- filter(parls, role == "mp") %>% pull(name_parls)
matches <- sen_list[sen_list %in% mp_list]

dups <- filter(parls, role == "mp") %>% group_by(name_parls) %>% filter(n() > 1)

```


#### Create daily weights

```{r}

# Create a sequence of dates
date_list <- data.frame(date = seq(as.Date("2008-01-01"),as.Date("2019-09-10"),1))

parls_sub <- filter(parls, role == "sen") %>% 
  dplyr::select(name_parls, start, leave, treated, independent, gender) %>%
  group_by(name_parls, gender) %>%
  summarize(start = mean(start),
            leave = mean(leave),
            treated = mean(treated),
            n = n())

# Match each date with each Senator
dates <- fuzzy_left_join(
  date_list, parls_sub,
    by = c("date" = "start",
           "date" = "leave"),
    match_fun = list(`>=`, `<`))

# Get the daiy count
weights_per_day <- dates %>%
  group_by(date) %>%
  summarize(men = sum(ifelse(gender == "M",1,0)),
            women = sum(ifelse(gender == "F",1,0)),
            treated = sum(treated),
            independent = sum(independent),
            untreated = n()-sum(treated)) %>%
  mutate(total = treated+untreated,
         yes_isg = treated/total,
         non_isg = untreated/total)

plot(weights_per_day$date, weights_per_day$total)


```

# Committee affiliations

```{r}

# Load the scrapped data on committees from scrape_pages.ipynb
coms <- read_csv("data/raw/scraped_full.csv")

# Fix co-chair and vice-chair
coms <- coms %>%
  mutate(coms = gsub("Vice-chair","Vicechair",coms),
         coms = gsub("Vice-Chair","Vicechair",coms),
         coms = gsub("Co-chair","Cochair",coms))

# Confirm how many different affiliations individual parliamentarians have
coms <- coms %>%
  mutate(count = str_count(pattern = "\\)", string = coms)) %>%
  mutate(count = ifelse(is.na(count), 1, count)) %>%
  tidyr::uncount(., count, .remove = TRUE, .id = TRUE) %>%
  rename(count = X1)

# Split
coms$committee <- NA
for (i in 1:nrow(coms)) {
  coms$committee[i] = strsplit(coms$coms[i],")")[[1]][coms$count[i]]
}

coms <- coms %>%
  separate(committee, c("position","committee"), remove = TRUE, sep = ":") %>%
  separate(committee, c("committee","dates"), remove = TRUE, sep = "\\(") %>%
  separate(dates, c("begin","end"), remove = TRUE, sep = " - ")

# Set everything as a date
coms <- mutate(coms,
               begin = as_date(begin),
               end = str_squish(end),
               end = as_date(end),
               end = ifelse(is.na(end), today(),end),
               end = as_date(end))

coms <- coms %>%
  mutate(member = ifelse(position == "Member", 1, 0),
         leader = ifelse(position != "Member", 1, 0))

```

## Name matching

```{r}

# Here we are creating a nam_parls variable that we can match across parl data, committee data, and lobbied data

# Create data frames for the lobbied names
lobby_data_names_sen <- data.frame(
  lobby_name = unique(filter(lobby_data,
                             lobbied_title == "Senator")$lobby_name),
  name_parls = NA)

lobby_data_names_mps <- data.frame(
  lobby_name = unique(filter(lobby_data,
                             lobbied_title == "Member of Parliament")$lobby_name),
  name_parls = NA)

# Clean them up
lobby_data_names_mps <- mutate(lobby_data_names_mps,
                           name_clean = gsub("The","",lobby_name),
                           name_clean = gsub(",","",name_clean),
                           name_clean = gsub("M.P.","",name_clean),
                           name_clean = gsub("Honourable","",name_clean),
                           name_clean = gsub("Honorable","",name_clean),
                           name_clean = gsub("Senator","",name_clean),
                           name_clean = gsub("P.C.","",name_clean),
                           name_clean = gsub("Hon.","",name_clean),
                           name_clean = gsub("Hon","",name_clean),
                           name_clean = gsub("Dr.","",name_clean),
                           name_clean = gsub("Mr.","",name_clean),
                           name_clean = gsub("    "," ", name_clean),
                           name_clean = gsub("   "," ", name_clean),
                           name_clean = gsub("  "," ", name_clean),
                           name_clean = gsub(" ","", name_clean))
lobby_data_names_sen <- mutate(lobby_data_names_sen,
                           name_clean = gsub("The","",lobby_name),
                           name_clean = gsub(",","",name_clean),
                           name_clean = gsub("M.P.","",name_clean),
                           name_clean = gsub("Honourable","",name_clean),
                           name_clean = gsub("Honorable","",name_clean),
                           name_clean = gsub("Senator","",name_clean),
                           name_clean = gsub("P.C.","",name_clean),
                           name_clean = gsub("Hon.","",name_clean),
                           name_clean = gsub("Hon","",name_clean),
                           name_clean = gsub("Dr.","",name_clean),
                           name_clean = gsub("Mr.","",name_clean),
                           name_clean = gsub("    "," ", name_clean),
                           name_clean = gsub("   "," ", name_clean),
                           name_clean = gsub("  "," ", name_clean),
                           name_clean = gsub(" ","", name_clean))

# From parliamentary data
sen_names <- filter(parls, role == "sen") %>% pull(name_parls) %>% unique()

mp_names <- filter(parls, role == "mp") %>% pull(name_parls) %>% unique()

# Next do a basic match
for (i in 1:nrow(lobby_data_names_sen)) {
  match <- agrep(lobby_data_names_sen$name_clean[i], sen_names,
                 ignore.case = TRUE, value = TRUE, max.distance = 0.15)
  lobby_data_names_sen$name_parls[i] <- ifelse(is_empty(match),NA,match)
}

for (i in 1:nrow(lobby_data_names_mps)) {
  match <- agrep(lobby_data_names_mps$name_clean[i], mp_names,
                 ignore.case = TRUE, value = TRUE, max.distance = 0.15)
  lobby_data_names_mps$name_parls[i] <- ifelse(is_empty(match),NA,match)
}

# Now merge with the lobby data
lobby_data_sen <- filter(lobby_data, lobbied_title == "Senator") %>% mutate(role = "sen")
lobby_data_sen <- merge(lobby_data_sen, select(lobby_data_names_sen, -name_clean), by = "lobby_name")

lobby_data_mps <- filter(lobby_data, lobbied_title == "Member of Parliament") %>% mutate(role = "mp")
lobby_data_mps <- merge(lobby_data_mps, select(lobby_data_names_mps, -name_clean), by = "lobby_name")

# Check
filter(lobby_data_sen, is.na(name_parls)) %>% dim() # 340 Senators
filter(lobby_data_mps, is.na(name_parls)) %>% dim() # 608 MPs
```

### Manual name fixes
```{r}

lobby_data_sen <- mutate(lobby_data_sen, 
                         name_parls = case_when(
                           lobby_name == "Andreychuk_Andrea"  ~ "Andreychuk_Raynell",
                           lobby_name == "Angus_David" ~ "Angus_W. David",
                           lobby_name == "Angus_David W"  ~ "Angus_W. David",
                           lobby_name == "Angus_David W."  ~ "Angus_W. David",
                           lobby_name == "Baker_James" ~ "Baker_George",
                           lobby_name == "Black_David" ~ "Black_Robert",
                           lobby_name == "Bob_Runciman" ~ "Runicman_Barry",
                           lobby_name == "Brwon_Burt" ~ "Brown_Bert",
                           lobby_name == "Céline_Hervieux-Payette" ~ "Hervieux-Payette_Céline",
                           lobby_name == "Comeau_George" ~ "Comeau_Gerald J.",
                           
                           lobby_name == "Cordy_Jane Marie"  ~ "Cordy_Jane",
                           lobby_name == "Cordy_Jane MArie"  ~ "Cordy_Jane",

                           lobby_name == "Cowan_John"  ~  "Cowan_James S.",
                           lobby_name == "David_Wells"  ~  "Wells_David M.",
                           lobby_name == "Elizabeth_May" ~ "May_Elizabeth",
                           lobby_name == "Enverga Jr._Tobias C." ~ "Enverga_Tobias C." ,
                           lobby_name == "Enverga_Tobias C. Jr." ~ "Enverga_Tobias C." ,
                           lobby_name == "Enverga_Tobias C. Jr." ~ "Enverga_Tobias C." ,
                           lobby_name == "Fortin_Suzanne" ~ "Fortin-Duplessis_Suzanne",
                           lobby_name == "Falcon-Ouelette_Robert" ~ "Ouellette_Robert-Falcon",
                           lobby_name == "Falcon-Oulette_Robert" ~ "Ouellette_Robert-Falcon",
                           lobby_name == "Galipeau_Royal" ~ "Galipeau_Joseph Bernard Royal Neil",
                           lobby_name == "Graham_David de Burgh" ~ "Galipeau_Joseph Bernard Royal Neil",
                           lobby_name == "Grant_Mitchell" ~ "Mitchell_Grant",
                           lobby_name == "Green-Raine_Nancy" ~ "Raine_Nancy Greene",
                           lobby_name == "Green Raine_Nancy" ~ "Raine_Nancy Greene",
                           lobby_name == "Green_Nancy" ~ "Raine_Nancy Greene",
                           lobby_name == "Greene-Raine_Nancy" ~ "Raine_Nancy Greene",
                           lobby_name == "Greene Raine_Nancy" ~ "Raine_Nancy Greene",
                           lobby_name == "Green Raine_Hon. Nancy" ~ "Raine_Nancy Greene",
                           lobby_name == "Rain Green_Nancy" ~ "Raine_Nancy Greene",
                           lobby_name == "Raine (Greene)_Nancy" ~ "Raine_Nancy Greene",
                           lobby_name == "Raine_Greene" ~ "Raine_Nancy Greene",
                           lobby_name == "Green_Steven" ~ "Greene_Stephen",
                           
                           lobby_name == "Peter_Harder" ~ "Harder_Peter",

                           lobby_name == "Hoepner_Candice" ~ "Bergen_Candice",
                           lobby_name == "Hoeppner_Candice" ~ "Bergen_Candice",
                           lobby_name == "Hoepner_Candice" ~ "Bergen_Candice",
                           lobby_name == "Hai Ngo_Thanh" ~ "Ngo_Thanh Hai",
                           lobby_name == "Leitch_Kelly" ~ "Leitch_K. Kellie",
                           lobby_name == "McInnis_Thomas Johnson" ~ "McInnis_Tom",
                           lobby_name == "Mihn-Thu Quac_Anne" ~ "Quach_Anne Minh-Thu",
                           lobby_name == "Minh-Chu Quach_Anne" ~ "Quach_Anne Minh-Thu",
                           lobby_name == "Minh-thu Quach_Anne" ~ "Quach_Anne Minh-Thu",
                           lobby_name == "Minh-Thu Quach_Anne" ~ "Quach_Anne Minh-Thu",
                           
                           lobby_name == "Nutall_Alexander" ~ "Nuttall_Alex",
                           lobby_name == "Nuttal_Alexander" ~ "Nuttall_Alex",
                           lobby_name == "Nuttall_Alexander" ~ "Nuttall_Alex",

                           lobby_name == "MacInnis_Thomas" ~ "McInnis_Tom",
                           
                           lobby_name == "Moore_Sen. Wilfred" ~ "Moore_Wilfred P.",
                           lobby_name == "Moore_senator Wilfred" ~ "Moore_Wilfred P.",
                           lobby_name == "Moore_William" ~ "Moore_Wilfred P.",
                           
                           lobby_name == "Munson_Sen. Jim" ~ "Munson_Jim",

                           lobby_name == "Ogilvie_Kenneth" ~ "Ogilvie_Kelvin Kenneth",
                           lobby_name == "Oglivy_Kevin" ~ "Ogilvie_Kelvin Kenneth",
                           lobby_name == "Oglivie_Kevin" ~ "Ogilvie_Kelvin Kenneth",
                           lobby_name == "Kelvin_Ogilvie" ~ "Ogilvie_Kelvin Kenneth",
                           lobby_name == "Oliver_David" ~ "Oliver_Donald H.",
                           
                           lobby_name == "Obnivar_Ratna" ~ "Omidvar_Ratna",

                           lobby_name == "Pau Woo_Yuen" ~ "Woo_Yuen Pau",

                           lobby_name == "Ringuette-Maltais_Pierette" ~ "Ringuette_Pierrette",
                           lobby_name == "Ringuette Maltais_Pierrette" ~ "Ringuette_Pierrette",
                           
                           lobby_name == "Runciman_Robert" ~ "Runciman_Bob",
                           lobby_name == "Runicman_Barry" ~ "Runciman_Bob",
                           lobby_name == "Seidman_Judith Seidman" ~ "Seidman_Judith G.",
                           lobby_name == "Singh Kang_Darshan" ~ "Kang_Darshan Singh",
                           lobby_name == "Wladyslaw_Lizan" ~ "Lizon_Wladyslaw",
                           lobby_name == "Wells_David" ~ "Wells_David M.",
                           lobby_name == "Wells_David Mark" ~ "Wells_David M.",
                           lobby_name == "Wells_Mark" ~ "Wells_David M.",
                           lobby_name == "Tkachuk_David" ~ "Tkachuk_David",
                           lobby_name == "Ringuette-Maltais_Pierrette" ~ "Ringuette_Pierrette",
                           lobby_name == "Rivest_Louis" ~ "Rivest_Jean-Claude",
                           lobby_name == "Sinclair_Mr. Justice Murray" ~ "Sinclair_Murray",
                           lobby_name == "Olsen Steward_Carolyn" ~ "Stewart Olsen_Carolyn",
                           lobby_name == "Olsen Stewart_Carolyn" ~ "Stewart Olsen_Carolyn",
                           lobby_name == "Olsen_Carolyn Stewart" ~ "Stewart Olsen_Carolyn",
                           lobby_name == "Olsen_Stewart" ~ "Stewart Olsen_Carolyn",
                           lobby_name == "Thomas Bernard_Hon. Wanda Elaine" ~ "Bernard_Wanda Thomas",
                           lobby_name == "Thomas Bernard_Wanda" ~ "Bernard_Wanda Thomas",
                           TRUE ~ name_parls
                         ))

lobby_data_check <- filter(lobby_data_sen, is.na(name_parls)) %>%
  select(lobby_name, date:lobbied_fn, name_parls)

lobby_data_check <- filter(lobby_data_mps, is.na(name_parls)) %>%
  select(lobby_name, date:lobbied_fn, name_parls)

# Rebind lobby_data
lobby_data <- rbind(lobby_data_sen, lobby_data_mps)
```

## Now bring everything together

```{r}

# Can do a spread if we want to know specific committees - just set up for a count right now
#coms <- spread(coms, key = "committee", value = "position")

# For each legislator
name_parls <- unique(coms$name_parls)
no_events <- data.frame(name_parls = NA, events = NA)

com_expand <- list()

# MAIN MERGE LOOP
# TAKES A LONG TIME BUT MERGES EVERYTHING ON DAILY BASIS
#for (i in 1:5) { # For testing
for (i in 1:length(name_parls)) { # For testing

  #i <- 1
  # Get Parliamentarian we are working with
  chosen_one <- name_parls[i]
  
  parls_sub <- filter(parls, name_parls == chosen_one)
  lobby_sub <- filter(lobby_data, name_parls == chosen_one)
  comms_sub <- filter(coms, name_parls == chosen_one)
  
  if (nrow(parls_sub) == 0) {
  #if (nrow(parls_sub) == 0 | nrow(lobby_sub) == 0 | nrow(comms_sub) == 0) {
    no_events[i,] <- c(chosen_one,NA)
    print(paste(i,chosen_one," does not appear in the parls df.", sep = ":"))
    next
  }
  
  df = expand.grid(date = seq.Date(min(parls_sub$begin, na.rm = TRUE), max(parls_sub$end), by = "day"),
                           name_parls = chosen_one)
  
  # Add in party affiliation and parliamentary specific data
  df <- fuzzy_join(
    df, parls_sub,
    by = c("name_parls",
           "date" = "begin",
           "date" = "end"),
    match_fun = list(`==`, `>`, `<=`)) %>%
    rename(name_parls = name_parls.x) %>% select(-name_parls.y)
  
  # Add in committee memberships
  df <- fuzzy_left_join(
    df, comms_sub,
    by = c("name_parls",
           "date" = "begin",
           "date" = "end"),
    match_fun = list(`==`, `>`, `<=`)) %>%
    rename(name_parls = name_parls.x,
           name = name.x,
           begin = begin.x, 
           end = end.x,
           role = role.x) %>% 
    select(-name.y, -name_parls.y, -begin.y, -end.y, -role.y, -url, -coms_num, -coms)

  # Group committee memberships
  df <- df %>%
    group_by(date,name_parls,role,name,province,party,begin,end,gender,
             treated, start, leave,appointing_PM,appointing_party,name_on_page) %>%
    summarize(member = sum(member, na.rm = TRUE),
              leader = sum(leader, na.rm = TRUE))
  
  # Now add all these characteristics to the lobby data
  df <- merge(df, lobby_sub, by = c("date","name_parls"), all.x = TRUE)
  
  # Add the weights for senators being from the ISG or not
  df <- merge(df, select(weights_per_day, date, yes_isg, non_isg), by = c("date"))
  
  num_events <- sum(!is.na(df$lobby_id))
  # Report how many were added
  print(paste(i,":",chosen_one,". Have added ", num_events, " lobbying events.", sep = ""))
  no_events[i,] <- c(chosen_one, num_events)

  # And then put into the list
  com_expand[[i]] <- df

}

daily_data <- data.table::rbindlist(com_expand)

```

# Add number of ISG Senators

```{r}

# Create a sequence of dates
date_list <- data.frame(date = seq(as.Date("2010-09-01"),as.Date("2019-09-10"),1))

parls_sub <- dplyr::filter(parls, role == "sen", 
                           #name_parls %in% c("Brazeau_Patrick","Bernard_Wanda Thomas","Ringuette_Pierrette")
                           ) %>%
  ungroup() %>%
  mutate(ind = ifelse(party %in% c("ISG","NONE"),1,0)) %>%
  dplyr::select(name_parls, begin, end, ind, party) %>%
  group_by(name_parls, ind) %>%
  summarize(begin = min(begin),
            end = max(end),
            independent = mean(ind),
            n = n())

# Match each date with each Senator
dates <- fuzzy_left_join(
  date_list, parls_sub,
    by = c("date" = "begin",
           "date" = "end"),
    match_fun = list(`>=`, `<`))

# Get the daiy count
weights_per_day <- dates %>%
  group_by(date) %>%
  summarize(independent = sum(independent, na.rm = TRUE),
            partisan = n()-sum(independent, na.rm = TRUE)) %>%
  mutate(total_sens = independent+partisan,
         prop_independent = independent/(independent + partisan)) %>%
  filter(date != "2019-07-01")

plot(weights_per_day$date, weights_per_day$total_sens)

daily_data <- merge(daily_data, weights_per_day, by = "date")

```

# Export daily data

```{r}

#daily_data %>% saveRDS("./data/daily_data_full.RDS")
#daily_data <- readRDS("./data/daily_data_full.RDS")

dat <- ungroup(daily_data) %>%
  select(date, party, name_parls, role.x, gender, province, start, leave, 
            appointing_PM, appointing_party, member, leader, treated,
            lobby_org_english, lobby_org_french, lobby_id, non_isg, yes_isg, partisan, independent, total_sens, prop_independent) %>%
  rename(role = role.x) %>%
  mutate(ind = ifelse(party %in% c("ISG","NONE") & role == "sen",1,0))

```

# Convert to monthly and export monthly

```{r}

#saveRDS(dat, "./data/working/daily_data_selection.RDS")
#dat <- readRDS("./data/working/daily_data_selection.RDS")

month_data <- filter(dat, 
                     #name_parls %in% c("Brazeau_Patrick","Bernard_Wanda Thomas","Ringuette_Pierrette")
                     ) %>%
  mutate(year_month = format(as.Date(date), "%Y-%m"),
         con_affiliation = case_when(
           party == "CPC" ~ 1,
           appointing_party == "CPC" ~ 1,
           TRUE ~ as.double(0)),
         lib_affiliation = case_when(
           party == "LPC" ~ 1,
           appointing_party == "LPC" ~ 1,
           TRUE ~ as.double(0)),
         senator = ifelse(role == "sen", 1,0),
         appointed_after = ifelse(role == "sen" & start >= "2016-03-10", 1, 0),
         og_isg = ifelse(name_parls %in% c("Demers_Jacques",
                                           "Wallace_John D.",
                                           "Rivard_Michel",
                                           "Bellemare_Diane",
                                           "McCoy_Elaine",
                                           "Ringuette_Pierrette"), 1, 0)) %>%
  # most of the variables don't change but see below for how I deal with those that do
  group_by(year_month, name_parls, senator, gender, province, treated,
           appointing_party, appointing_PM, appointed_after, start, og_isg, member, leader,
           con_affiliation, lib_affiliation, partisan, independent, total_sens, ind) %>%
  summarize(total = sum(!is.na(lobby_id)))

# Need to remove the early years where mandatory tracking not required
month_data <- month_data %>%
  ungroup() %>%
  mutate(year_month = as.yearmon(year_month))

month_data <- filter(month_data,
                     as.yearmon(start) < year_month,
                     year_month < as.yearmon("Oct 2019"),
                     year_month > as.yearmon("Sep 2010"))

# If people add committees during a month, then they appear twice. I average the number of committees for that month and take the total sum
month_data2 <- month_data %>%
  group_by(year_month, name_parls, senator, gender, province, treated,
           appointing_party, appointing_PM, appointed_after, start, og_isg,
           con_affiliation, lib_affiliation) %>%
  summarize(total = sum(total),
            member = mean(member),
            leader = mean(leader),
            independent = mean(independent),
            partisan = mean(partisan),
            total_sens = mean(total_sens),
            ind = mean(ind))

```


# Export

```{r}
# Add monthly vars
month_data2 <- month_data2 %>% 
  ungroup() %>%
  mutate(year = as.numeric(format(as.yearmon(year_month), "%Y")),
         trudeau_gov = ifelse(as.numeric(year_month) >= as.numeric(as.yearmon("Nov 2015")), 1, 0),
         ISG_lead = ifelse(as.numeric(year_month) >= as.numeric(as.yearmon("Dec 2015")), 1, 0),
         ISG_lag = ifelse(as.numeric(year_month) >= as.numeric(as.yearmon("June 2016")), 1, 0),
         ISG = ifelse(as.numeric(year_month) >= as.numeric(as.yearmon("Mar 2016")), 1, 0),
         ISG_linear = ifelse(as.numeric(year_month) > as.numeric(as.yearmon("Mar 2016")), (year_month - 2016.167)*12, 0),
         IAB_lead = ifelse(as.numeric(year_month) >= as.numeric(as.yearmon("Oct 2015")), 1, 0),
         IAB_lag = ifelse(as.numeric(year_month) >= as.numeric(as.yearmon("Apr 2016")), 1, 0),
         IAB = ifelse(as.numeric(year_month) >= as.numeric(as.yearmon("Jan 2016")), 1, 0),
         IAB_linear = ifelse(as.numeric(year_month) > as.numeric(as.yearmon("Jan 2016")), (year_month - 2016)*12, 0),
         tenure_years = as.numeric(year_month) - as.numeric(as.yearmon(start)),
         log_tenure = ifelse(tenure_years != 0, log(tenure_years), NA),
         senior = ifelse(tenure_years >= 10, 1, 0))

saveRDS(month_data2, "./data/working/month_data.RDS")

```
